knitr::opts_chunk$set(echo = TRUE)
devtools::install_github("cmf-uchicago/cmfproperty")
## WARNING: Rtools is required to build R packages, but is not currently installed.
##
## Please download and install Rtools 4.0 from https://cran.r-project.org/bin/windows/Rtools/.
## Skipping install of 'cmfproperty' from a github remote, the SHA1 (78054fd9) has not changed since last install.
## Use `force = TRUE` to force installation
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.1.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(DBI)
library(dbplyr)
##
## 载入程辑包:'dbplyr'
## The following objects are masked from 'package:dplyr':
##
## ident, sql
library(sf)
## Linking to GEOS 3.9.1, GDAL 3.2.1, PROJ 7.2.1; sf_use_s2() is TRUE
library(cmfproperty)
library(lubridate)
##
## 载入程辑包:'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
con <- dbConnect(RSQLite::SQLite(), "C:/Users/yitia/Desktop/470/part.1/detroit.sqlite")
DBI::dbListTables(con)
## [1] "assessments" "blight" "foreclosures" "parcels"
## [5] "parcels_historic" "sales"
sales <- dplyr::tbl(con, 'sales') %>% dplyr::collect()
assessments <- dplyr::tbl(con, 'assessments') %>% dplyr::collect()
blight <- dplyr::tbl(con, 'blight') %>% dplyr::collect()
foreclosures <- dplyr::tbl(con, 'foreclosures') %>% dplyr::collect()
parcels <- dplyr::tbl(con, 'parcels') %>% dplyr::collect()
parcels_historic <- dplyr::tbl(con, 'parcels_historic') %>% dplyr::collect()
dbDisconnect(con)
sales
assessments
blight
foreclosures
parcels
parcels_historic
sales <- sales %>%
select(parcel_num, sale_date, sale_price)
sales$sale_year <-year(sales$sale_date)
sales
assessments <- assessments %>%
select(PARCELNO , year, ASSESSEDVALUE)
s1 <-sales %>%
group_by(sale_year) %>%
summarise(mean_price = mean(sale_price))
s2 <-sales %>%
group_by(sale_year) %>%
summarise(median_price = median(sales$sale_price))
s3 <-assessments %>%
group_by(year) %>%
summarise(mean_ASSESSEDVALUE = mean(ASSESSEDVALUE))
s4 <-assessments %>%
group_by(year) %>%
summarise(median_ASSESSEDVALUE = median(ASSESSEDVALUE))
join <- s1 %>%
left_join(s2)
## Joining, by = "sale_year"
join1 <- join %>%
left_join(s3, by=c( "sale_year"="year"))
join2 <- join1 %>%
left_join(s4, by=c( "sale_year"="year"))
join2
p <- ggplot(data = join2,
mapping = aes(
x = sale_year,
y = mean_price))
p + geom_line()

p <- ggplot(data = join2,
mapping = aes(
x = sale_year,
y = median_price))
p + geom_line()

p <- ggplot(data = join2,
mapping = aes(
x = sale_year,
y = mean_ASSESSEDVALUE))
p + geom_line()

p <- ggplot(data = join2,
mapping = aes(
x = sale_year,
y = median_ASSESSEDVALUE))
p + geom_line()

#section b
sales <- sales %>%
filter(as.numeric(`sale_price`) > 2500)
assessments <- assessments %>%
filter(year <= 2020)
joined <- sales %>%
left_join(assessments, by=c("parcel_num"="PARCELNO", "sale_year"="year"))
joined <- joined %>%
select(parcel_num, sale_year, sale_price, ASSESSEDVALUE)
ratios <-
cmfproperty::reformat_data(
joined,
sale_col = "sale_price",
assessment_col = "ASSESSEDVALUE",
sale_year_col = "sale_year",
)
## [1] "Filtered out non-arm's length transactions"
## [1] "Inflation adjusted to 2020"
cmfproperty::make_report(ratios,
jurisdiction_name = "Detroit, Michigan")
##
##
## processing file: report.Rmd
##
|
| | 0%
|
|.. | 3%
## inline R code fragments
##
##
|
|.... | 6%
## label: options (with options)
## List of 2
## $ echo : logi FALSE
## $ out.width: chr "175px"
##
##
|
|...... | 9%
## ordinary text without R code
##
##
|
|........ | 11%
## label: setup (with options)
## List of 1
## $ include: logi FALSE
## Joining, by = "TAX_YEAR"
##
|
|.......... | 14%
## inline R code fragments
##
##
|
|............ | 17%
## label: mainbinnedscatter
##
|
|.............. | 20%
## inline R code fragments
##
##
|
|................ | 23%
## label: over under bar
##
|
|.................. | 26%
## ordinary text without R code
##
##
|
|.................... | 29%
## label: iaao graphs
##
|
|...................... | 31%
## inline R code fragments
##
##
|
|........................ | 34%
## label: cod graph
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 row(s) containing missing values (geom_path).
##
|
|.......................... | 37%
## inline R code fragments
##
##
|
|............................ | 40%
## label: prd graph
##
|
|.............................. | 43%
## inline R code fragments
##
##
|
|................................ | 46%
## label: prb graph
##
|
|.................................. | 49%
## inline R code fragments
##
##
|
|.................................... | 51%
## label: standardstable (with options)
## List of 1
## $ results: chr "asis"
##
##
|
|...................................... | 54%
## ordinary text without R code
##
##
|
|........................................ | 57%
## label: results_tbl_values (with options)
## List of 1
## $ results: chr "asis"
##
##
|
|.......................................... | 60%
## ordinary text without R code
##
##
|
|............................................ | 63%
## label: all_asr_plot (with options)
## List of 1
## $ fig.height: num 7
##
|
|.............................................. | 66%
## ordinary text without R code
##
##
|
|................................................ | 69%
## label: sale_ratio_decile_tbl (with options)
## List of 1
## $ results: chr "asis"
##
##
|
|.................................................. | 71%
## ordinary text without R code
##
##
|
|.................................................... | 74%
## label: monte carlo
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values
##
|
|...................................................... | 77%
## ordinary text without R code
##
##
|
|........................................................ | 80%
## label: monte carlo graphs (with options)
## List of 1
## $ fig.height: num 5
## Warning: Removed 1 rows containing missing values (geom_vline).
## Warning: Removed 1 rows containing missing values (geom_vline).
## Warning: Removed 1 rows containing missing values (geom_vline).
## Warning: Removed 1 rows containing missing values (geom_vline).
## Warning: Removed 1 rows containing missing values (geom_vline).
##
|
|.......................................................... | 83%
## ordinary text without R code
##
##
|
|............................................................ | 86%
## label: vertical equity methods (with options)
## List of 1
## $ results: chr "asis"
##
##
|
|.............................................................. | 89%
## inline R code fragments
##
##
|
|................................................................ | 91%
## label: diagnostics (with options)
## List of 1
## $ eval: logi FALSE
##
##
|
|.................................................................. | 94%
## inline R code fragments
##
##
|
|.................................................................... | 97%
## label: diagnostics2 (with options)
## List of 1
## $ eval: logi FALSE
##
##
|
|......................................................................| 100%
## ordinary text without R code
## output file: report.knit.md
## "C:/Program Files/RStudio/bin/pandoc/pandoc" +RTS -K512m -RTS report.knit.md --to html4 --from markdown+autolink_bare_uris+tex_math_single_backslash --output pandoc1b0c6e164e69.html --lua-filter "C:\Users\yitia\Documents\R\win-library\4.1\rmarkdown\rmarkdown\lua\pagebreak.lua" --lua-filter "C:\Users\yitia\Documents\R\win-library\4.1\rmarkdown\rmarkdown\lua\latex-div.lua" --self-contained --variable bs3=TRUE --standalone --section-divs --table-of-contents --toc-depth 2 --variable toc_float=1 --variable toc_selectors=h1,h2 --variable toc_collapsed=1 --variable toc_smooth_scroll=1 --variable toc_print=1 --template "C:\Users\yitia\Documents\R\win-library\4.1\rmarkdown\rmd\h\default.html" --no-highlight --variable highlightjs=1 --number-sections --variable theme=sandstone --include-in-header "C:\Users\yitia\AppData\Local\Temp\RtmpWIQTEi\rmarkdown-str1b0c44831079.html" --mathjax --variable "mathjax-url:https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML" --citeproc
##
## Output created: Detroit, Michigan.html
## [1] "~~~~~~~~~~~~"
## [1] "geom warnings can be disregarded"
## [1] "Report created at C:/Users/yitia/Desktop/470/part.1/Detroit, Michigan.html"
stats <- cmfproperty::calc_iaao_stats(ratios)
head(stats)
binned <-
cmfproperty::binned_scatter(
ratios,
min_reporting_yr = 2011,
max_reporting_yr = 2020,
jurisdiction_name = "Detroit, Michigan"
)
print(binned[[1]])
## [1] "In 2020, the most expensive homes (the top decile) were assessed at 13.9% of their value and the least expensive homes (the bottom decile) were assessed at 6.8%. In other words, the least expensive homes were assessed at <b>0.49 times</b> the rate applied to the most expensive homes. Across our sample from 2011 to 2020, the most expensive homes were assessed at 15.3% of their value and the least expensive homes were assessed at 229.5%, which is <b>14.97 times</b> the rate applied to the most expensive homes."
binned[[2]]

pct_over <-
cmfproperty::pct_over_under(
ratios,
min_reporting_yr = 2011,
max_reporting_yr = 2020,
jurisdiction_name = "Detroit, Michigan"
)
## Joining, by = "TAX_YEAR"
print(pct_over[[1]])
## [1] "In Detroit, Michigan, <b>72%</b> of the lowest value homes are overassessed and <b>5%</b> of the highest value homes are overassessed."
pct_over[[2]]

#section 3
summary_info <-
cmfproperty::regression_tests(ratios, produce_table = TRUE)
##
## <table style="text-align:center"><tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="3">Dependent Variable</td></tr>
## <tr><td></td><td colspan="3" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td>ASSESSED_VALUE</td><td>log(ASSESSED_VALUE)</td><td>RATIO</td></tr>
## <tr><td style="text-align:left"></td><td>(1)</td><td>(2)</td><td>(3)</td></tr>
## <tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">SALE_PRICE</td><td>0.02<sup>***</sup></td><td></td><td>-0.0000<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.0004)</td><td></td><td>(0.0000)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">log(SALE_PRICE)</td><td></td><td>0.05<sup>***</sup></td><td></td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.003)</td><td></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Constant</td><td>15,815.31<sup>***</sup></td><td>8.86<sup>***</sup></td><td>1.19<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(46.94)</td><td>(0.03)</td><td>(0.004)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
## <tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Observations</td><td>112,731</td><td>112,731</td><td>112,731</td></tr>
## <tr><td style="text-align:left">R<sup>2</sup></td><td>0.03</td><td>0.003</td><td>0.05</td></tr>
## <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.03</td><td>0.003</td><td>0.05</td></tr>
## <tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="3" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
kableExtra::kable(summary_info)
|
Model
|
Value
|
Test
|
T Statistic
|
Conclusion
|
Model Description
|
|
paglin72
|
15815.3136120
|
> 0
|
336.89233
|
Regressive
|
AV ~ SP
|
|
cheng74
|
0.0529021
|
< 1
|
19.75547
|
Regressive
|
ln(AV) ~ ln(SP)
|
|
IAAO78
|
-0.0000023
|
< 0
|
-77.10270
|
Regressive
|
RATIO ~ SP
|
|
kochin82
|
0.0652176
|
< 1
|
19.75547
|
Regressive
|
ln(SP) ~ ln(AV)
|
|
bell84
|
15048.4638366
|
> 0
|
307.98776
|
Regressive
|
AV ~ SP + SP^2
|
|
|
0.0000000
|
< 0
|
-50.34653
|
Regressive
|
AV ~ SP + SP^2
|
|
sunderman90
|
-6345.3808588
|
> 0
|
-27.12189
|
Progressive
|
AV ~ SP + low + high + low * SP + high * SP
|
#section 4
foreclosures <- foreclosures %>%
select(c(`2002`,`2003`,`2004`,`2006`,`2007`,`2008`,`2009`,`2010`,`2011`,`2012`,`2013`,`2014`,`2015`,`2016`,`2017`,`2018`,`2019`))
foreclosures[is.na(foreclosures)] <- 0
f <- colSums(foreclosures)
f
## 2002 2003 2004 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015
## 246 2484 2034 5108 2252 4111 7986 11632 12999 20040 18785 23845 24340
## 2016 2017 2018 2019
## 12590 6289 3880 2748
frame <- data.frame(
year = c(2002,2003,2004,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019),
foreclosures_num =c(246,2484, 2034,5108,2252,4111,7986,11632,12999,20040,18785,23845,24340,12590,6289,3880,2748))
frame
join_f<- ratios %>%
left_join(frame, by=c( "SALE_YEAR"="year"))
join_f
m1 <- lm(RATIO ~ foreclosures_num, data=join_f)
summary(m1)
##
## Call:
## lm(formula = RATIO ~ foreclosures_num, data = join_f)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.5384 -0.6979 -0.3060 0.3041 6.6066
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.869e-01 7.269e-03 53.22 <2e-16 ***
## foreclosures_num 4.731e-05 4.464e-07 105.99 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.219 on 114468 degrees of freedom
## (因为不存在,3270个观察量被删除了)
## Multiple R-squared: 0.08937, Adjusted R-squared: 0.08936
## F-statistic: 1.123e+04 on 1 and 114468 DF, p-value: < 2.2e-16